/*
*  This file is part of ygg-brute
*  Copyright (c) 2020 ygg-brute authors
*  See LICENSE for licensing information
*/

DECLSPEC inline void scalar_to_folds(uint8_t folds[8], const uint32_t limbs[8])
{
    for(unsigned i = 0; i < 32; ++i) {
        uint8_t fold = 0;
        for(unsigned j = 0; j < 8; ++j) {
            fold |= ((limbs[j] >> i) & 0x01) << j;
        }
        folds[i] = fold;
    }
}

#define scalar_mul_folds(P, S, TABLE)                     \
    do                                                    \
    {                                                     \
        point_identity(P);                                \
                                                          \
        uint8_t _folds[32];                               \
        scalar_to_folds(_folds, (S));                     \
                                                          \
        for (int _i = 31; _i >= 0; --_i)                  \
        {                                                 \
            point_add_inplace((P), &(TABLE)[_folds[_i]]); \
            if (_i)                                       \
            {                                             \
                point_double_inplace((P));                \
            }                                             \
        }                                                 \
    } while (0)

#define load_fold_table(DST, SRC)                                        \
    do                                                                   \
    {                                                                    \
        const size_t _lid = local_id();                                  \
        const size_t _lsize = local_size();                              \
                                                                         \
        for (size_t _line = 0; _line < 256 / _lsize; ++_line)            \
        {                                                                \
            (DST)[_lid + _line * _lsize] = (SRC)[_lid + _line * _lsize]; \
        }                                                                \
                                                                         \
        size_t _rest_offset = _lsize * (256 / _lsize);                   \
        size_t _rest = 256 - _rest_offset;                               \
                                                                         \
        if (_lid < _rest)                                                \
        {                                                                \
            (DST)[_rest_offset + _lid] = (SRC)[_rest_offset + _lid];     \
        }                                                                \
    } while (0)